import pandas as pd
import numpy as np
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d')

c = 1000

df = pd.read_csv('../data/gpo_final_data/narratives_complete_with_metadata_manual_labels_rich_{0}.csv'.format(c), parse_dates=['date'], date_parser=dateparse)

# Add year field
df['year'] = df['date'].dt.year

# Get narratives with most polar sentiment
pos_narratives = list(df.groupby(['narrative']).mean()['sentiment_compound'].nlargest(10).reset_index().sort_values('sentiment_compound', ascending = True).narrative)
neg_narratives = list(df.groupby(['narrative']).mean()['sentiment_compound'].nsmallest(10).index)
narratives_to_plot = pos_narratives + neg_narratives
cleaned_df = df[df['narrative'].isin(narratives_to_plot)]
unique_df = cleaned_df.drop_duplicates(subset=['narrative'])
unique_df_sorted = pd.DataFrame(columns = ['narrative', 'sentiment_compound'])

for narrative in narratives_to_plot:
    temp_narrative = unique_df[unique_df['narrative']==narrative][['narrative', 'sentiment_compound']]
    unique_df_sorted = unique_df_sorted.append(temp_narrative, ignore_index=True)

unique_df_sorted.to_csv('../data/temp/sentiment_narratives.csv', index=False)
